



* This file creates the replication data file for Politics with the People
* Note: DG is "deliberative group" arm, IO is "information only" arm, and TC is "true control" arm
* This is a basic replication file that shows the structure of the data and exclusions
* Please direct questions to Kevin Esterling (kevin.esterling@ucr.edu)



use "mainknall.dta", clear


* fix up missing data etc.
recode fol* (-2/-1=.)
recode bas* (-2/-1=.)
recode bgm* (-2/-1=.)
recode nov* (-2/-1=.)
recode discpart (2=1) (.=0)
recode follcomp bgm_comp (.=0)

* generate assignment (Z) variables
gen asstre=(exp_grp==1|exp_grp==2)  /* assigned to DG arm */
gen asspar=(exp_grp==4)  /* assigned to IO arm */
gen asstru=(asstre==0&asspar==0)   /* assigned to TC arm */

* create indicators for vendors
gen gmi=(sample==3)
gen ssi=(sample==2)
gen kn=(sample==1)


* create mutually exclusive and exhaustive categories to classify how everyone branched down to the followup survey. 
* See appendix to the POQ paper on knowledge gains for a figure 
* 
gen tyyy=(bgm_comp==1 & discpart==1 & follcomp==1)
gen tyyn=(bgm_comp==1 & discpart==1 & follcomp==0)
gen tyny=((exp_grp==1 | exp_grp==2) & bgm_comp==1 & discpart==0 & follcomp==1)
gen tynn=((exp_grp==1 | exp_grp==2) & bgm_comp==1 & discpart==0 & follcomp==0)
gen tnyy=(bgm_comp==0 & discpart==1 & follcomp==1)
gen tnyn=(bgm_comp==0 & discpart==1 & follcomp==0)
gen tnny=((exp_grp==1 | exp_grp==2) & bgm_comp==0 & discpart==0 & follcomp==1)
gen tnnn=((exp_grp==1 | exp_grp==2) & bgm_comp==0 & discpart==0 & follcomp==0)
gen txxx_all=tyyy+tyyn+tyny+tynn+tnyy+tnyn+tnny+tnnn
gen pyy=(exp_grp==4 & slfrpt<3 & bgm_comp==1 & follcomp==1 & tyyy==0)
gen pyn=(exp_grp==4 & slfrpt<3 & bgm_comp==1 & follcomp==0 & tyyy==0)
gen pny=(exp_grp==4 & slfrpt<3 & bgm_comp==0 & follcomp==1 & tyyy==0)
gen pnn=(exp_grp==4 & slfrpt<3 & bgm_comp==0 & follcomp==0 & tyyy==0)
gen ty=(exp_grp==3 & tnyy==0 & follcomp==1 & slfrpt<3)
gen tn=(exp_grp==3 & tnyy==0 & follcomp==0 & slfrpt<3)
gen slf_nonpart_kn=(exp_grp==0 & slfrpt==4 & kn==1)
gen slf_nonpart_ssi=(exp_grp==0 & slfrpt==4 & kn==0)
gen slf_nonpart=slf_nonpart_kn+slf_nonpart_ssi
gen surv_only=(slfrpt==3 & tyyy==0)
gen surv_only_ty=(surv_only==1 & bgm_comp==0 & exp_grp==3 & follcomp==1)
gen surv_only_tn=(surv_only==1 & bgm_comp==0 & exp_grp==3 & follcomp==0)
gen surv_only_pyy=(slfrpt==3 & exp_grp==4 & tyyy==0 & bgm_comp==1 & follcomp==1)
gen surv_only_pyn=(slfrpt==3 & exp_grp==4 & tyyy==0 & bgm_comp==1 & follcomp==0)
gen surv_only_pny=(slfrpt==3 & exp_grp==4 & tyyy==0 & bgm_comp==0 & follcomp==1)
gen surv_only_pnn=(slfrpt==3 & exp_grp==4 & tyyy==0 & bgm_comp==0 & follcomp==0)
gen surv_only_nonpart=(slfrpt==3 & exp_grp==0)
gen treatmentok_nonpart=(slfrpt<3 & exp_grp==0) 

* these are useful summaries of these branches, some of which are used below:
gen treatment_pass=tyyy+tnyy
gen partial_pass=pyy+tyny+surv_only_pyy
gen true_pass=pny+tnny+ty+surv_only_ty+surv_only_pny
gen noncomp_treatment_pass=tyyn+tnyn
gen noncomp_partial_pass=pyn+tynn+surv_only_pyn
gen noncomp_true_pass=pnn+tn+tnnn+surv_only_tn+surv_only_pnn
gen comp_pass=treatment_pass+partial_pass+true_pass
gen noncomp_pass=pyn+pnn+tn+tyyn+tynn+tnyn+tnnn
gen complier=treatment_pass+partial_pass+true_pass
gen noncomplier=noncomp_treatment_pass+noncomp_partial_pass+noncomp_true_pass+slf_nonpart
gen surveys=true_pass+partial_pass
gen slfpart=(slf_nonpart==0)
ren surv_only survonly

* Generate exposure variables
gen treatment=treatment_pass+noncomp_treatment_pass /*exposed to DG treatment */
gen partial=partial_pass+noncomp_partial_pass  /* exposed to IO treatment */
gen true=true_pass+noncomp_true_pass  /* true controls */

* This is a randomization that we did not use, which tracks whether the session was a "question" ==1 or a "comment" == 2 frame.
gen tcond1=(treatment==1 & exp_grp==1)
gen tcond2=(treatment==1 & exp_grp==2)


* activate these if you want to drop the nonparticipants, who refused to participate in the surveys
drop if treatmentok_nonpart
drop if surv_only_nonpart
*drop if slf_nonpart==1
replace asstre=. if slfpart==0
replace asspar=. if slfpart==0
replace survonly=. if slfpart==0
replace treatment=. if slfpart==0
replace partial=. if slfpart==0
drop if slfpart==0
drop slfpart


* set up district index
capture drop cd
drop if geo_grp==15 /* this session did not occur */
drop if geo_grp==99 /* this drops nonparticipants */
* replace geo_grp=. if geo_grp==99
egen cd=group(geo_grp)
drop if cd==.



* create indicators of who completed the bgm 
egen x=rowmiss(bgm*)
gen bgmcomp=(x<17)
drop x
replace bgmcomp=. if asstru==1


* make these not factors
gen x=(follcomp==1)
drop follcomp
ren x follcomp
gen x=(discpart==1)
drop discpart
ren x discpart

* only those who did bgm survey and/or participated in DG event (if asstre or asspar) are eligible for the follow up, per revised agreement with KN
gen follelig=1-((bgmcomp==0&discpart==0)&(asstre==1|asspar==1))
replace follcomp=. if follelig==0
*******

* we don't observe treatment compliance among those denied the treatment
replace discpart = . if asstre==0

* create indicator of who completed the november survey
egen x=rowmiss(nov*)
gen novcomp=(x<63)
drop x
* only those who did session or did follow up are eligible for the november survey
gen novelig=(discpart==1|follcomp==1)
replace novcomp=. if novelig==0


* how did these two participants get into the treatment?  In any case.....
replace asstre=1 if treatment==1



* some housekeeping for independent variables

recode bas5_d (.=0)
gen bas6_d=(bas6==3) 
gen bas7_d=(bas7==3)
gen bas8_d=(bas8==2)
gen bas9_d=(bas9==2)
gen genknow=bas5_d+bas6_d+bas7_d+bas8_d+bas9_d
gen genknowd=genknow>3

recode ppwork (-2=.) (1/3=0) (4/9=1)
gen work=(ppwork==1)
replace work=. if ppwork==.
gen somecollege=(ppeducat==3)
replace somecollege=. if ppeducat==.
gen collegeormore=(ppeducat==4)
replace collegeormore=. if ppeducat==.
gen female=(ppgender==2)
replace female=. if ppgender==.
gen age=ppage
gen white=(ppethm==1)
replace white=. if ppethm==.

* dichotomize the need for cognition/evaluation variables:
gen bas45d=bas45<3
replace bas45d=. if bas45==.
gen bas46d=bas46<3
replace bas46d=. if bas46==.
gen bas47d=bas47<3
replace bas47d=. if bas47==.
gen bas48d=bas48>3
replace bas48d=. if bas48==.
* egen temp=rowmiss(bas45 bas46 bas47 bas48)
* keep if temp==0
* drop temp

* did the answer to fol10a appear (randomly) in bgm materials (1 if yes), we did not analyze this.
recode rot_2 (.=0)
gen ans10a=(rot_2==1)
